*===============================================================================
*Firm and Country Embedded Productivity (Alviarez, Cravino and Ramondo) 
*Constructiong bilateral shares at the level of the firm, sin(phi)
*Last Modified by Vanessa Alviarez 04/2022
*===============================================================================
global typeden=1
include "TFP_firm_directory_historical.do"
set memory 64g
global lf "LF"


** Checking the difference between the old and the new data **
clear all
use year isocode sector1 sector data_source GO_usd EMPE VA_usd LAB_usd GO_usd_exp using "${output}/klems_oecd_unido_orbis_sales_emp_exp.dta", clear
foreach vv in GO_usd EMPE VA_usd LAB_usd GO_usd_exp data_source {
display "`vv'"
rename `vv' `vv'_old
}

merge m:1 year sector isocode using "${output}/klems_oecd_comb_selectvar_v2.dta", keepusing(EMPE GO_usd LAB_usd COMP_usd data_source) 
drop if _merge==2 
drop _merge
foreach vv in EMPE GO_usd LAB_usd COMP_usd data_source {
display "`vv'"
rename `vv' `vv'_new
}
keep if year==2016
sort isocode sector1 sector
order year isocode sector1 sector data_source* GO_usd_old GO_usd_new EMPE_old EMPE_new LAB_usd_old LAB_usd_new COMP_usd_new

foreach vv in EMPE* GO_usd* LAB_usd* COMP_usd*  {
display "`vv'"
format %9.0fc `vv' 
}


** Introducing the age of the firm **
/*
set memory 64g
clear all
use if year>1 & year<=1990 using "F:\Dropbox (UBC-Umich)\Research_Projects\Project Javier and Natalia\TFP firm\TFP_firm_data\ORBIS_2018\orbis_batch2021_collapsed\incorporation_year.dta", clear
collapse (min) year_unit=year, by(bvdidnumber)
save "${output}/year_1_1990.dta", replace

clear all
use if year>1990 & year<=2000 using "F:\Dropbox (UBC-Umich)\Research_Projects\Project Javier and Natalia\TFP firm\TFP_firm_data\ORBIS_2018\orbis_batch2021_collapsed\incorporation_year.dta", clear
collapse (min) year_unit=year, by(bvdidnumber)
save "${output}/year_1990_2000.dta", replace

clear all
use if year>2000 & year<=2005 using "F:\Dropbox (UBC-Umich)\Research_Projects\Project Javier and Natalia\TFP firm\TFP_firm_data\ORBIS_2018\orbis_batch2021_collapsed\incorporation_year.dta", clear
collapse (min) year_unit=year, by(bvdidnumber)
save "${output}/year_2000_2005.dta", replace

clear all
use if year>2005 & year<=2010 using "F:\Dropbox (UBC-Umich)\Research_Projects\Project Javier and Natalia\TFP firm\TFP_firm_data\ORBIS_2018\orbis_batch2021_collapsed\incorporation_year.dta", clear
collapse (min) year_unit=year, by(bvdidnumber)
save "${output}/year_2005_2010.dta", replace

clear all
use if year>2010 & year<=2015 using "F:\Dropbox (UBC-Umich)\Research_Projects\Project Javier and Natalia\TFP firm\TFP_firm_data\ORBIS_2018\orbis_batch2021_collapsed\incorporation_year.dta", clear
collapse (min) year_unit=year, by(bvdidnumber)
save "${output}/year_2010_2015.dta", replace

clear all
use if year>2015 using "F:\Dropbox (UBC-Umich)\Research_Projects\Project Javier and Natalia\TFP firm\TFP_firm_data\ORBIS_2018\orbis_batch2021_collapsed\incorporation_year.dta", clear
collapse (min) year_unit=year, by(bvdidnumber)
save "${output}/year_2015.dta", replace
*/


*====================================================================
*Step 1: Data with common set of firms
*====================================================================
clear all
set more off

local var0 sales
local varset sales emp exports va labcost
*local types "naics naics3 naics4"
local types "naics"


etime, start
foreach type in `types' {

*local type="naics"
display "`type'"

use year isocode id_bvd guo_bvd hq `varset' sector* NAICS* con_code using "${output}/firm_allyears_sales.dta", clear 
drop if `var0'==. | `var0'==0
tab con_code
drop if con_code=="$lf"
drop con_code
tab sector

**Bring the age of the firm**
gen bvdidnumber=id_bvd
foreach xx in year_1_1990  year_1990_2000 year_2000_2005 year_2005_2010 year_2010_2015 year_2015 {
display "`xx'"
merge m:1 bvdidnumber using "${output}/`xx'.dta", keepusing(year_unit) update
drop if _merge==2
drop _merge 
}
tempfile temp
save `temp', replace 

gen year_unit2=year_unit
replace year_unit2=1890 if year_unit2<=1890
gen firm_age=year-year_unit
gen firm_age2=year-year_unit2

*15 (16) and 20 (21) years is the median and average (5 year or less takes aboyt 10% of the observations, 10years or less is about 25% of the observations)
sum firm_age firm_age2 if year==2016, d 
$conditionforage 


if "`type'"=="naics4" {
drop if sector=="NA"
tab sector
local industry NAICS4 
}
if "`type'"=="naics3" {
drop if sector=="NA"
tab sector
local industry NAICS3 
}
if "`type'"=="naics" {
drop if sector=="NA"
tab sector
local industry sector 
}
if "`type'"=="market" {
keep if sector1=="Manufacturing (C)" | sector1=="Market_Services (G-H-I-J-K-M-N-R-S-T)" | sector1=="Other_Goods (A-B-D-E-F)" 
drop sector*
gen sector="Market (MARKT)"
gen sector1=sector
local industry sector 
}
if "`type'"=="MS" {
keep if sector1=="Manufacturing (C)" |  sector1=="Market_Services (G-H-I-J-K-M-N-R-S-T)" 
drop sector*
gen sector="MS"
gen sector1=sector
local industry sector 
}
if "`type'"=="manuf" {
keep if sector1=="Manufacturing (C)" 
drop sector*
gen sector="Manufacturing (C)"
gen sector1=sector
local industry sector 
}
if "`type'"=="serv" {
keep if sector1=="Market_Services (G-H-I-J-K-M-N-R-S-T)" 
drop sector*
gen sector="Market_Services (G-H-I-J-K-M-N-R-S-T)"
gen sector1=sector
local industry sector 
}
*

*Collapse at the level of the parent (this includes the sales of the GU-isocode at home as well as in other countries)
sort year isocode sector1 sector `industry' hq guo_bvd
collapse (sum) `varset' (max) firm_age firm_age2, by(year isocode sector1 sector `industry' hq guo_bvd)

**I am computing MNC dummy (WITHIN A SECTOR) based on the number of countries  
*by year sector1 sector guo_bvd isocode, sort: gen a=_n==1
*tab a
*by year sector1 sector guo_bvd, sort: egen num_iso=total(a)
*drop a

*I am computing MNC dummy (across ALL SECTORS) based on the number of countries  
by year guo_bvd isocode, sort: gen a=_n==1
tab a
by year guo_bvd, sort: egen num_iso=total(a)
drop a

gen MNC=0
replace MNC=1 if num_iso>1
tab MNC
tab MNC if isocode!=hq
drop if MNC==0 & isocode==hq
tab MNC
drop if MNC==0 
*The above line keeps firms with only one affiliate but in a foreign country
*Rather not, you need to be in at least 2 countries (at the sector level) to be in the sample

**********************************************************
foreach vv of local varset {
display "`vv'"
rename `vv' sin_`vv'
}
**********************************************************
sort year hq isocode
tempfile p1
save `p1', replace

keep if sin_`var0'!=. & sin_`var0'!=0 
compress
tempfile p2
save `p2', replace


*-----------------------------------------
*Bring production Klems-OECD data --- Xn, Xnn, Xn-exp (this files were prepared in 'aggregate_data_sales.do' files ----
*-----------------------------------------
*Variables: GO_usd GO_usd_exp sales Xnn Xnn_orbis Xnn2
*GO_usd (sales in MM USD) ---- yn/yi ----
*sales (sales in MM USD -- as measured in Orbis)  ---- yn/yi ----
*GO_usd_exp (revenue - exports MM USD)  ---- yn_exp/yi_exp -----
*Xnn (revenue by local producers, this is excluding the sales of foreign affiliates) ----- ynn/yii -----
*Xnn2 (Xnn but replaces with Xnn_orbis whenever itcontains missing values) ----- ynn/yii -------
*klems_oecd_unido_orbis_sales_emp_exp_0514
clear all
use `p2', clear
merge m:1 year sector isocode using "${output}/klems_oecd_unido_orbis_sales_emp_exp.dta", keepusing(EMP* GO_usd GO_usd_exp VA_usd exports LAB_usd data_source) 
keep if year>=2005 & year<=2017
drop if _merge==2 
drop _merge

***I have to change this***
gen Xnn2=GO_usd
***I have to change***
rename VA_usd yn_va_ko
rename EMPE yn_emp_ko
rename exports yn_exp_ko
rename GO_usd yn_`var0'_ko
rename LAB_usd yn_labcost_ko
rename Xnn2 ynn_`var0'_ko
rename GO_usd_exp yn_`var0'_exp_ko
format %9.0fc yn_`var0'_ko ynn_`var0'_ko yn_`var0'_exp_ko yn_exp_ko yn_va_ko yn_labcost_ko
rename data_source data_source_0


*Now introduce the aggregates using KLEMS-NEW 
merge m:1 year sector isocode using "${output}/klems_oecd_comb_selectvar_v2.dta", keepusing(EMPE GO_usd LAB_usd COMP_usd data_source) 
drop if _merge==2 
drop _merge
rename EMPE yn_emp_ko_new
rename GO_usd yn_`var0'_ko_new
rename LAB_usd yn_labcost_ko_new
rename COMP_usd yn_comp_ko_new
format %9.0fc yn_emp_ko_new yn_`var0'_ko_new yn_labcost_ko_new yn_comp_ko_new
rename data_source data_source_1
local newlist yn_emp_ko_new yn_`var0'_ko_new yn_labcost_ko_new yn_comp_ko_new
display "`newlist'"


*Now introduce the aggregates using ORBIS 
merge m:1 year sector isocode using "${output}/orbis_historical_agg_allyears_allvariables.dta", keepusing(*_orbis_all)
drop if _merge==2 
drop _merge
*Renaming ORBIS variables 
rename GO_usd_orbis_all GO_usd_orbis
rename VA_usd_orbis_all VA_usd_orbis
rename EMP_orbis_all EMP_orbis
rename EXP_usd_orbis_all EXP_usd_orbis
rename LAB_usd_orbis_all LAB_usd_orbis

gen GO_usd_exp_orbis=GO_usd_orbis-EXP_usd_orbis
sum GO_usd_orbis VA_usd_orbis EMP_orbis EXP_usd_orbis GO_usd_exp_orbis 
rename GO_usd_orbis yn_`var0'_ko_orbis
rename LAB_usd_orbis yn_labcost_ko_orbis 
rename VA_usd_orbis yn_va_ko_orbis
rename EMP_orbis yn_emp_ko_orbis
rename EXP_usd_orbis yn_exp_ko_orbis
rename GO_usd_exp_orbis yn_`var0'_exp_ko_orbis
local orbislist  yn_`var0'_ko_orbis yn_va_ko_orbis yn_emp_ko_orbis yn_labcost_ko_orbis yn_exp_ko_orbis yn_`var0'_exp_ko_orbis
display "`orbislist'"


*Now introduce the aggregates using ORBIS 
foreach nn in 100 50 20 {
display "`nn'"
merge m:1 year sector isocode using "${tfp_bef}/ORBIS_2018/orbis_batch2021_collapsed/orbis_historical_agg_allyears_all_`nn'largest.dta", keepusing(*_orbis_all)
drop if _merge==2 
drop _merge
*Renaming ORBIS variables 
rename GO_usd_orbis_all GO_usd_orbis`nn'
rename VA_usd_orbis_all VA_usd_orbis`nn'
rename EMP_orbis_all EMP_orbis`nn'
rename EXP_usd_orbis_all EXP_usd_orbis`nn'
rename LAB_usd_orbis_all LAB_usd_orbis`nn'

gen GO_usd_exp_orbis`nn'=GO_usd_orbis`nn'-EXP_usd_orbis`nn'
sum GO_usd_orbis`nn' VA_usd_orbis`nn' EMP_orbis`nn' EXP_usd_orbis`nn' GO_usd_exp_orbis`nn' 
rename GO_usd_orbis`nn' yn_`var0'_ko_orbis`nn'
rename LAB_usd_orbis`nn' yn_labcost_ko_orbis`nn' 
rename VA_usd_orbis`nn' yn_va_ko_orbis`nn'
rename EMP_orbis`nn' yn_emp_ko_orbis`nn'
rename EXP_usd_orbis`nn' yn_exp_ko_orbis`nn'
rename GO_usd_exp_orbis`nn' yn_`var0'_exp_ko_orbis`nn'
local orbislist`nn'  yn_`var0'_ko_orbis`nn' yn_va_ko_orbis`nn' yn_emp_ko_orbis`nn' yn_labcost_ko_orbis`nn' yn_exp_ko_orbis`nn' yn_`var0'_exp_ko_orbis`nn'
display "`orbislist`nn''"
}
local orbislist2 `orbislist100' `orbislist50' `orbislist20'
tempfile t0
save `t0', replace


*-----------------------------------
use `t0', clear
keep year isocode sector1 sector `industry' yn_`var0'_ko yn_emp_ko  yn_va_ko ynn_`var0'_ko yn_`var0'_exp_ko  yn_exp_ko yn_labcost_ko `orbislist' `orbislist2' `newlist'
duplicates drop 
collapse (sum) yn_`var0'_ko yn_emp_ko yn_va_ko ynn_`var0'_ko yn_`var0'_exp_ko  yn_exp_ko yn_labcost_ko `orbislist' `orbislist2' `newlist', by(year isocode sector1 sector `industry')
tempfile t1
save `t1', replace
*--------------------------------------


************** I dont see where I generate the shares -- They are generated in line 132
use `t0', clear
collapse (sum) sin_sales sin_emp sin_va sin_exp sin_labcost (max) firm_age firm_age2, by(year guo_bvd isocode hq  sector1 sector `industry')
merge m:1 year isocode sector1 sector `industry' using `t1'
drop _merge

foreach i in sin_sales sin_emp sin_va sin_exp sin_labcost yn_`var0'_ko yn_emp_ko ynn_`var0'_ko yn_`var0'_exp_ko  yn_exp_ko yn_labcost_ko  `orbislist' `orbislist2' `newlist' {
replace `i'=. if `i'==0
}
tempfile final
save `final', replace


*--------------------------------------------------
*Construct the relevant shares 
*--------------------------------------------------
use `final', clear

display "Drop firms below 100,000 USD" 
sum sin_`var0', d
sum sin_`var0' if sin_`var0'<0.1 
drop if sin_`var0'<0.1 


gen sin_emp_ko=sin_emp/yn_emp_ko
gen sin_va_ko=sin_va/yn_va_ko
gen sin_labcost_ko=sin_labcost/yn_labcost_ko
gen sin_`var0'_ko=sin_`var0'/yn_`var0'_ko
gen sin_`var0'_local_ko=sin_`var0'/ynn_`var0'_ko
gen sin_`var0'_exp_ko=(sin_`var0'-sin_exp)/yn_`var0'_exp_ko

**** Alternatives to use exports (replace with zeros, missing values outside manufacturing)****
sum yn_sales_exp_ko if sector1=="Other_Goods (A-B-D-E-F)"
sum yn_sales_exp_ko if sector1=="Manufacturing (C)"
sum yn_sales_exp_ko if sector1=="Market_Services (G-H-I-J-K-M-N-R-S-T)"
sum yn_sales_exp_ko if sector1=="Non-Market Economy"


***********************************************
gen dummy_temp=0
foreach xx in DE EE FR GB GR HR HU SI {
display "`xx'"
replace dummy_temp=1 if isocode=="`xx'"
}

*** ALT 1***** Notice we are not substracting exports from the denominator whenever exports are missing and therefore replaced by zeros
*tab sector1
*gen xx=sin_exp 
*replace xx=0 if xx==. 
*gen sin_`var0'_exp_ko_alt=(sin_`var0'-xx)/yn_`var0'_ko if sector1=="Manufacturing (C)" & xx==0
*replace sin_`var0'_exp_ko_alt=(sin_`var0'-xx)/yn_`var0'_exp_ko if sector1=="Manufacturing (C)" & xx!=0
*replace sin_`var0'_exp_ko_alt=sin_`var0'_ko if sector1!="Manufacturing (C)"
*drop xx

*** ALT 2***** Only using data when export data is available (of course, substracting also exports from the denominator. Only for the countries in question) --- we will present with the original baseline and an altyernative baseline only using firms with export information for the group of selected countries. 
tab sector1
gen xx=sin_exp 
gen sin_`var0'_exp_ko_alt=sin_`var0'_ko
replace sin_`var0'_exp_ko_alt=(sin_`var0'-xx)/yn_`var0'_exp_ko if sector1=="Manufacturing (C)" & dummy_temp==1
drop xx

*If you want a baseline that uses only firms with exports
replace sin_`var0'_ko=. if sin_`var0'_exp_ko_alt==. & sector1=="Manufacturing (C)" & dummy_temp==1

*** ALT 3***** replace with zero the numerator when not available, but change also the denominator (BUT ONLY IN THESE COUNTRIES)
*tab sector1
*gen xx=sin_exp 
*replace xx=0 if xx==.
*gen sin_`var0'_exp_ko_alt=sin_`var0'_ko
*replace sin_`var0'_exp_ko_alt=(sin_`var0'-xx)/yn_`var0'_exp_ko if sector1=="Manufacturing (C)" & dummy_temp==1
*drop xx

drop dummy_temp


*** TBD ****
preserve 
collapse (sum) sin_exp, by(isocode year sector)
save "${output}/checking_EE_mncexp.dta", replace
restore
*** TBD ****


sum sin_`var0'_ko sin_`var0'_exp_ko sin_`var0'_exp_ko_alt

*HK, SG and JP are the isocodes barely affected by this (I have to take a look to the aggregates for JP in 2017) 
tab isocode if sin_sales_ko>=1 | sin_sales_ko<=0 | sin_sales_ko==.
tab isocode if sin_`var0'_exp_ko>=1 | sin_`var0'_exp_ko<=0
tab isocode if sin_`var0'_exp_ko_alt>=1 | sin_`var0'_exp_ko_alt<=0
sum sin_`var0'_exp_ko sin_`var0'_exp_ko_alt sin_sales_ko

drop if sin_sales_ko>=1 | sin_sales_ko<=0 | sin_sales_ko==.
replace sin_emp_ko=. if sin_emp_ko>=1 | sin_emp_ko<=0
replace sin_va_ko=. if sin_va_ko>=1 | sin_va_ko<=0
replace sin_labcost_ko=. if sin_labcost_ko>=1 | sin_labcost_ko<=0

replace sin_`var0'_exp_ko=0.99 if sin_`var0'_exp_ko>=1 & sin_`var0'_exp_ko!=.
replace sin_`var0'_exp_ko=0.01 if  sin_`var0'_exp_ko<=0 & sin_`var0'_exp_ko!=.
replace sin_`var0'_exp_ko_alt=0.99 if sin_`var0'_exp_ko_alt>=1 & sin_`var0'_exp_ko_alt!=.
replace sin_`var0'_exp_ko_alt=0.01 if  sin_`var0'_exp_ko_alt<=0 & sin_`var0'_exp_ko_alt!=.
**** Above: I just want to make sure that the same number of firms as in the baseline ***** Now I have to think if the same should be done with the rest of the variables 

tab isocode sector if sin_`var0'_exp_ko!=.
tab isocode sector if sin_`var0'_exp_ko_alt!=.


*Including this for NEW-KLEMS aggregates in the denominator
*---------------------------------------
gen sin_labcost_ko_new=sin_labcost/yn_labcost_ko_new
gen sin_comp_ko_new=sin_labcost/yn_comp_ko_new
gen sin_`var0'_ko_new=sin_`var0'/yn_`var0'_ko_new
gen sin_emp_ko_new=sin_emp/yn_emp_ko_new

*HK, SG and JP are the isocodes barely affected by this (I have to take a look to the aggregates for JP in 2017) 
replace sin_labcost_ko_new=. if sin_labcost_ko_new>=1 | sin_labcost_ko_new<=0
replace sin_comp_ko_new=. if sin_comp_ko_new>=1 | sin_comp_ko_new<=0
replace sin_`var0'_ko_new=. if sin_`var0'_ko_new>=1 | sin_`var0'_ko_new<=0
replace sin_emp_ko_new=. if sin_emp_ko_new>=1 | sin_emp_ko_new<=0


*Including this for ORBIS aggregates in the denominator
*---------------------------------------
gen sin_emp_ko_orbis=sin_emp/yn_emp_ko_orbis
gen sin_va_ko_orbis=sin_va/yn_va_ko_orbis
gen sin_labcost_ko_orbis=sin_labcost/yn_labcost_ko_orbis
gen sin_`var0'_ko_orbis=sin_`var0'/yn_`var0'_ko_orbis
gen sin_`var0'_exp_ko_orbis=(sin_`var0'-sin_exp)/yn_`var0'_exp_ko_orbis

**** Alternatives to use exports (replace with zeros, missing values outside manufacturing)****
gen xx=sin_exp 
replace xx=0 if xx==. & sector1!="Manufacturing (C)"
gen sin_`var0'_exp_ko_orbis_alt=(sin_`var0'-xx)/yn_`var0'_exp_ko_orbis
drop xx

*HK, SG and JP are the isocodes barely affected by this (I have to take a look to the aggregates for JP in 2017) 
replace sin_sales_ko_orbis=. if sin_sales_ko_orbis>=1 | sin_sales_ko_orbis<=0
replace sin_emp_ko_orbis=. if sin_emp_ko_orbis>=1 | sin_emp_ko_orbis<=0
replace sin_va_ko_orbis=. if sin_va_ko_orbis>=1 | sin_va_ko_orbis<=0
replace sin_labcost_ko_orbis=. if sin_labcost_ko_orbis>=1 | sin_labcost_ko_orbis<=0
replace sin_`var0'_exp_ko_orbis=. if sin_`var0'_exp_ko_orbis>=1 | sin_`var0'_exp_ko_orbis<=0
replace sin_`var0'_exp_ko_orbis_alt=. if sin_`var0'_exp_ko_orbis_alt>=1 | sin_`var0'_exp_ko_orbis_alt<=0


*Including this for ORBIS aggregates in the denominator (first ## countries) 
*---------------------------------------
foreach nn in 100 50 20 {
display "`nn'"

gen sin_emp_ko_orbis`nn'=sin_emp/yn_emp_ko_orbis`nn'
gen sin_va_ko_orbis`nn'=sin_va/yn_va_ko_orbis`nn'
gen sin_labcost_ko_orbis`nn'=sin_labcost/yn_labcost_ko_orbis`nn'
gen sin_`var0'_ko_orbis`nn'=sin_`var0'/yn_`var0'_ko_orbis`nn'
gen sin_`var0'_exp_ko_orbis`nn'=(sin_`var0'-sin_exp)/yn_`var0'_exp_ko_orbis`nn'

*HK, SG and JP are the isocodes barely affected by this (I have to take a look to the aggregates for JP in 2017) 
replace sin_sales_ko_orbis`nn'=. if sin_sales_ko_orbis`nn'>=1 | sin_sales_ko_orbis`nn'<=0
replace sin_emp_ko_orbis`nn'=. if sin_emp_ko_orbis`nn'>=1 | sin_emp_ko_orbis`nn'<=0
replace sin_va_ko_orbis`nn'=. if sin_va_ko_orbis`nn'>=1 | sin_va_ko_orbis`nn'<=0
replace sin_labcost_ko_orbis`nn'=. if sin_labcost_ko_orbis`nn'>=1 | sin_labcost_ko_orbis`nn'<=0
replace sin_`var0'_exp_ko_orbis`nn'=. if sin_`var0'_exp_ko_orbis`nn'>=1 | sin_`var0'_exp_ko_orbis`nn'<=0
}


*--------------------------------------------------
*Create lags (growth excercise)
*--------------------------------------------------
*local y=2017
*local gap=7
*local y0=`y'-`gap'
*display "`y0'"
egen id = group(isocode sector1 sector `industry' guo_bvd) 
sort id year
xtset id year 
foreach vv in sales emp {
foreach tt in 1 2 3 5 6 7 8 9 10 11 {
display "`vv'; `tt'"
by id, sort: gen l`tt'_sin_`vv'_ko = l`tt'.sin_`vv'_ko
label var l`tt'_sin_`vv'_ko "lag `tt' of sin_`vv'_ko"
}
}
drop id
xtset, clear


*Keep only MNC (two countries, which includes two or more affiliates) and IN THE SECTOR 
*----------------------------------------------
by year sector guo_bvd isocode, sort: gen a=_n==1
by year sector guo_bvd, sort: egen b=total(a)
tab b 
keep if b>=2
drop a b 
format %9.4fc sin* 
format %9.0fc *`var0' yn* 
sort sector1 sector `industry' hq isocode
order sector1 sector `industry' hq isocode sin_`var0'_ko sin_`var0'_local_ko sin_`var0'_exp_ko sin_`var0'_exp_ko_alt


*--------------------------------------------------
*Create percentiles 
*--------------------------------------------------
foreach pos in 10 20 30 40 50 60 70 80 90 {
display `pos'
gsort year isocode sector1 sector `industry' -sin_sales
by year isocode sector1 sector `industry': egen p`pos'_sales = pctile(sin_sales), p(`pos')
format %9.3fc p`pos'_sales
label var p`pos'_sales "pctile `pos' of sin_sales"
}
*

*--------------------------------------------------
*Number of countries in which the affiliate operates (outside home)
*--------------------------------------------------
sort year sector1 `industry' hq guo_bvd isocode
by year sector1 `industry' hq guo_bvd isocode, sort: gen a=_n==1
replace a=0 if isocode==hq
by year sector1 `industry' hq guo_bvd, sort: egen num_ctry=total(a)
drop a
label var num_ctry "number of foreign countries GUO operates within `type'"

tempfile LCS
save `LCS', replace


*Firms in the Largest Connected Set (all checked, naics, naics3 and naics4, are all connected)
*----------------------------------------------
use `LCS', clear
keep year `industry' guo_bvd isocode iso sin_`var0' sin_`var0'_ko yn_`var0'_ko
duplicates drop
sort year isocode guo_bvd
egen iso=group(isocode)
egen guo=group(guo_bvd)
egen sss=group(`industry')
order year `industry' guo_bvd isocode sss guo iso sin_`var0' sin_`var0'_ko yn_`var0'_ko
sort year sss iso guo 
keep if isocode!=""
keep if isocode=="JP" | isocode=="IT" | isocode=="KR" | isocode=="DE" | isocode=="FR" | isocode=="ES" |  isocode=="MX" |  isocode=="GB" |  isocode=="PL" |  isocode=="GR" |  isocode=="NL" |  isocode=="RO" |  isocode=="BE" |  isocode=="AT" |  isocode=="PT" |  isocode=="FI" |  isocode=="SE" |  isocode=="CZ" |  isocode=="SK" |  isocode=="BG" |  isocode=="HU" |  isocode=="HR" |  isocode=="LV" |  isocode=="SI" |  isocode=="LT" |  isocode=="EE" 
save "${output}/LCS_`type'.dta", replace 
keep year sss iso guo sin_`var0' sin_`var0'_ko yn_`var0'_ko
sort year sss iso guo 
order year sss guo iso sin_`var0' sin_`var0'_ko yn_`var0'_ko 
export delimited using "${output}/LCS_`type'", novarnames replace


*labeling variables of interest 
*------------------------
use `LCS', clear
label var sin_emp_ko "sin_emp/yn_emp (den KLEMS/OECD)"
label var sin_va_ko "sin_va/yn_va (den KLEMS/OECD)"
label var sin_`var0'_ko "sin_`var0'/yn (den KLEMS/OECD)"
label var sin_`var0'_local_ko "sin_`var0'/ynn (den KLEMS/OECD)"
label var sin_`var0'_exp_ko "(sin_`var0'-sin_exp)/yn_`var0'_exp_ko (den KLEMS/OECD/WIOT)"
label var sin_`var0'_exp_ko_alt "(sin_`var0'-sin_exp)/yn_`var0'_exp_ko (den KLEMS/OECD/WIOT)-- zeros if different from manufacturing"

label var sin_`var0' "`var0' of i MNCs operating in n (cond operating in i)"
label var sin_emp "emp of i MNCs operating in n (cond operating in i)"
label var sin_exp "exports of i MNCs operating in n (cond operating in i)"
label var sin_va "value added of i MNCs operating in n (cond operating in i)"
label var sin_labcost "Labor cost of i MNCs operating in n (cond operating in i)"

label var yn_`var0'_ko "total `var0' in country n"
label var yn_emp_ko "total emp in country n"
label var yn_va_ko "total va in country n"
label var yn_labcost_ko "total labor cost in country n"
label var yn_exp_ko "total exports in country n"
label var ynn_`var0'_ko "total `var0' of local firms in country n"
label var yn_`var0'_exp_ko "total `var0' (minus exports) in country n"


*Labing the variable for KLEMS-NEW
*---------------------------
label var sin_labcost_ko_new "sin_labcost/yn_va (den new)"
label var sin_comp_ko_new "sin_labcost/yn_va (den new)"
label var sin_`var0'_ko_new "sin_`var0'/yn (den new)"
label var sin_emp_ko_new "sin_emp/yn_emp (den new)"

label var yn_`var0'_ko_new "total `var0' in country n (new)"
label var yn_emp_ko_new "total emp in country n (new)"
label var yn_labcost_ko_new "total labor cost in country n (new)"
label var yn_comp_ko_new "total compensation in country n (new)"


*Labing the variable for ORBIS
*---------------------------
label var sin_emp_ko_orbis "sin_emp/yn_emp (den orbis)"
label var sin_va_ko_orbis "sin_va/yn_va (den orbis)"
label var sin_labcost_ko_orbis "sin_labcost/yn_va (den orbis)"
label var sin_`var0'_ko_orbis "sin_`var0'/yn (den orbis)"
label var sin_`var0'_exp_ko_orbis "(sin_`var0'-sin_exp)/yn_`var0'_exp_ko (den orbis)"
label var sin_`var0'_exp_ko_orbis_alt "(sin_`var0'-sin_exp)/yn_`var0'_exp_ko (den orbis)-- zeros if different from manufacturing"

label var yn_`var0'_ko_orbis "total `var0' in country n (orbis)"
label var yn_emp_ko_orbis "total emp in country n (orbis)"
label var yn_va_ko_orbis "total va in country n (orbis)"
label var yn_labcost_ko_orbis "total labor cost in country n (orbis)"
label var yn_exp_ko_orbis "total exports in country n (orbis)"
label var yn_`var0'_exp_ko_orbis "total `var0' (minus exports) in country n (orbis)"
compress


tempfile tt_b0
save `tt_b0', replace

*Saving the file
*------------------------
drop if year==.
order year sector1 sector `industry' hq isocode sin_`var0'_ko sin_`var0'_local_ko sin_`var0'_exp_ko 
compress
tempfile sin_firmlevel_`type'
save `sin_firmlevel_`type'', replace
}
etime 



*===============================================================================
*Computing the Largest Connected Set (all sector are connected in all years!)
*===============================================================================
/*
local types "naics naics3 naics4"

foreach type in `types' {
clear all
import delimited "${output}/LCS_`type'_fromMatlab.csv"
rename v1 year
rename v2 sss
rename v3 guo
rename v4 iso
duplicates drop 
tempfile LCS_matlab
save `LCS_matlab', replace

use "${output}/LCS_`type'.dta", clear
merge 1:1 year sss guo iso using `LCS_matlab'
tab _merge
}
*
*/

*===============================================================================
*Step 2(b): Drop outlier observations 
*===============================================================================
display "Drop foreing affiliates below 1MM USD and Parents below 5MM USD"
clear all
local whattype naics
use `sin_firmlevel_`whattype'', clear

sum sin_`var0', d
sum sin_`var0' if sin_`var0'<1 
tab isocode if sin_`var0'>=1
display "Drop firms below 1 MILLION USD" 
*drop if sin_`var0'<1 

duplicates drop 
compress 
tempfile sample_firms
save `sample_firms', replace




*===============================================================================
*Temporary check on gravity variables
*===============================================================================
use "${inputs}/gravdata_00_16.dta", clear
keep if year==2016
replace year=2017
tempfile temp
save `temp', replace

use "${inputs}/gravdata_00_16.dta", clear
append using `temp'
tempfile gravdata_00_17
save `gravdata_00_17', replace



*===============================================================================
*Step 2: Additions: include gravity variables and a dummy of constant MNC across years  
*===============================================================================
clear all
set more off

etime, start 
foreach type in `types' {

*local type naics  
display "`type'"

if "`type'"=="naics4" {
local industry NAICS4 
}
if "`type'"=="naics3" {
local industry NAICS3 
}
else {
local industry sector
}

*Bring gravity variables 
*-----------------------------------------------
use `sin_firmlevel_`type'', clear


*Drop the outliers
*----------------------------------------------- 
merge m:1 year hq isocode sector guo_bvd using `sample_firms', keepusing(year hq isocode guo_bvd)
keep if _merge==3
drop _merge
*-----------------------------------------------


gen iso2_o=hq 
gen iso2_d=isocode
local trade_var distw fta_wto contig comlang_off colony  pop_o pop_d gdp_o gdp_d gdpcap_o gdpcap_d   
merge m:1 iso2_o iso2_d year using `gravdata_00_17', keepusing(`trade_var')

drop if _merge==2
drop _merge
foreach var in iso2_o iso2_d {
encode `var', gen(`var'b)
drop `var'
rename `var'b `var'
}
*
gen distance=ln(distw)
drop distw
drop iso2_o iso2_d
rename fta_wto RTA
label var distance "ln distance" 
label var RTA "Regional Trade Aggrement" 
label var comlang_off "Common Language" 
label var contig "commnon Border" 
label var colony "Former Colony" 
compress


*Flag MNCs number of years it has information (out of yy years)
*-----------------------------------------------
by sector1 sector guo_bvd isocode year, sort: gen a=_n==1
replace a=0 if year<2010
by sector1 sector guo_bvd isocode, sort: egen years_after2010=total(a)
drop a
by sector1 sector guo_bvd isocode year, sort: gen a=_n==1
by sector1 sector guo_bvd isocode, sort: egen years_allperiod=total(a)
drop a
gen sample_const=1
compress

****** ERASE ******
****** ERASE ******
****** ERASE ******
******replace sin_`var0'_ko=. if sin_`var0'_exp_ko_alt==.
****** ERASE ******
****** ERASE ******
****** ERASE ******
save  "${output}/sin_firmlevel_`type'_gravity${lf}_all.dta", replace
}
etime




/*
******************************************************************************
clear all
local type naics 
global lf "LF"
use "${output}/sin_firmlevel_`type'_gravity${lf}_all.dta", clear 

keep if year==2016 
keep if isocode!=""
keep if isocode=="DK" | isocode=="JP" | isocode=="IT" | isocode=="KR" | isocode=="DE" | isocode=="FR" | isocode=="ES" |  isocode=="MX" |  isocode=="GB" |  isocode=="PL" |  isocode=="GR" |  isocode=="NL" |  isocode=="RO" |  isocode=="BE" |  isocode=="AT" |  isocode=="PT" |  isocode=="FI" |  isocode=="SE" |  isocode=="CZ" |  isocode=="SK" |  isocode=="BG" |  isocode=="HU" |  isocode=="HR" |  isocode=="LV" |  isocode=="SI" |  isocode=="LT" |  isocode=="EE" 

collapse (count) sin_sales_ko sin_sales_exp_ko sin_sales_exp_ko_alt, by(year sector1 isocode) 

sum sin_sales_ko
gsort year sector1- sin_sales_exp_ko

br year sector1 sector hq isocode guo_bvd sin_sales_ko sin_sales_ko_new
sum sin_sales_ko sin_sales_ko_new if isocode=="JP" & year==2016
sum sin_sales_ko sin_sales_ko_new if isocode!="JP" & year==2016
br year sector1 sector hq isocode sin_sales_ko sin_labcost_ko sin_sales_ko_orbis sin_labcost_ko_orbis yn_sales_ko yn_labcost_ko yn_sales_ko_orbis yn_labcost_ko_orbis

*use `sin_firmlevel_`type'', clear
******************************************************************************





*** Vanessa checking June, 2022


clear all
local type naics 
global lf "LF"
use "${output}/sin_firmlevel_`type'_gravity${lf}.dta", clear 

keep if year==2016 
keep if isocode!=""
keep if isocode=="DK" | isocode=="JP" | isocode=="IT" | isocode=="KR" | isocode=="DE" | isocode=="FR" | isocode=="ES" |  isocode=="MX" |  isocode=="GB" |  isocode=="PL" |  isocode=="GR" |  isocode=="NL" |  isocode=="RO" |  isocode=="BE" |  isocode=="AT" |  isocode=="PT" |  isocode=="FI" |  isocode=="SE" |  isocode=="CZ" |  isocode=="SK" |  isocode=="BG" |  isocode=="HU" |  isocode=="HR" |  isocode=="LV" |  isocode=="SI" |  isocode=="LT" |  isocode=="EE" 

br year sector1 sector hq isocode guo_bvd sin_sales_ko 
sum sin_sales_ko

*--------------------------------
clear all
local type naics 
global lf "LF"
use "${output}/sin_firmlevel_`type'_gravity${lf}_sawb.dta", clear 

keep if year==2016 
keep if isocode!=""
keep if isocode=="DK" | isocode=="JP" | isocode=="IT" | isocode=="KR" | isocode=="DE" | isocode=="FR" | isocode=="ES" |  isocode=="MX" |  isocode=="GB" |  isocode=="PL" |  isocode=="GR" |  isocode=="NL" |  isocode=="RO" |  isocode=="BE" |  isocode=="AT" |  isocode=="PT" |  isocode=="FI" |  isocode=="SE" |  isocode=="CZ" |  isocode=="SK" |  isocode=="BG" |  isocode=="HU" |  isocode=="HR" |  isocode=="LV" |  isocode=="SI" |  isocode=="LT" |  isocode=="EE" 

br year sector1 sector hq isocode guo_bvd sin_sales_ko 
sum sin_sales_ko




